{
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "# Stepwise Regression Forward "
      ],
      "metadata": {}
    },
    {
      "cell_type": "code",
      "source": [
        "import numpy as np\n",
        "import matplotlib.pyplot as plt\n",
        "import pandas as pd\n",
        "\n",
        "import warnings\n",
        "warnings.filterwarnings(\"ignore\")\n",
        "\n",
        "# yahoo finance is used to fetch data \n",
        "import yfinance as yf\n",
        "yf.pdr_override()"
      ],
      "outputs": [],
      "execution_count": 1,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false,
        "execution": {
          "iopub.status.busy": "2022-06-16T01:52:21.313Z",
          "iopub.execute_input": "2022-06-16T01:52:21.318Z",
          "iopub.status.idle": "2022-06-16T01:52:21.325Z",
          "shell.execute_reply": "2022-06-16T01:52:21.281Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# input\n",
        "symbol = 'AMD'\n",
        "start = '2017-01-01'\n",
        "end = '2017-12-31'\n",
        "\n",
        "# Read data \n",
        "dataset = yf.download(symbol,start,end)\n",
        "\n",
        "# View Columns\n",
        "dataset.head()"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[*********************100%***********************]  1 of 1 completed\n"
          ]
        },
        {
          "output_type": "execute_result",
          "execution_count": 2,
          "data": {
            "text/plain": "             Open   High    Low  Close  Adj Close    Volume\nDate                                                       \n2017-01-03  11.42  11.65  11.02  11.43      11.43  55182000\n2017-01-04  11.45  11.52  11.24  11.43      11.43  40781200\n2017-01-05  11.43  11.69  11.23  11.24      11.24  38855200\n2017-01-06  11.29  11.49  11.11  11.32      11.32  34453500\n2017-01-09  11.37  11.64  11.31  11.49      11.49  37304800",
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Open</th>\n      <th>High</th>\n      <th>Low</th>\n      <th>Close</th>\n      <th>Adj Close</th>\n      <th>Volume</th>\n    </tr>\n    <tr>\n      <th>Date</th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>2017-01-03</th>\n      <td>11.42</td>\n      <td>11.65</td>\n      <td>11.02</td>\n      <td>11.43</td>\n      <td>11.43</td>\n      <td>55182000</td>\n    </tr>\n    <tr>\n      <th>2017-01-04</th>\n      <td>11.45</td>\n      <td>11.52</td>\n      <td>11.24</td>\n      <td>11.43</td>\n      <td>11.43</td>\n      <td>40781200</td>\n    </tr>\n    <tr>\n      <th>2017-01-05</th>\n      <td>11.43</td>\n      <td>11.69</td>\n      <td>11.23</td>\n      <td>11.24</td>\n      <td>11.24</td>\n      <td>38855200</td>\n    </tr>\n    <tr>\n      <th>2017-01-06</th>\n      <td>11.29</td>\n      <td>11.49</td>\n      <td>11.11</td>\n      <td>11.32</td>\n      <td>11.32</td>\n      <td>34453500</td>\n    </tr>\n    <tr>\n      <th>2017-01-09</th>\n      <td>11.37</td>\n      <td>11.64</td>\n      <td>11.31</td>\n      <td>11.49</td>\n      <td>11.49</td>\n      <td>37304800</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
          },
          "metadata": {}
        }
      ],
      "execution_count": 2,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false,
        "execution": {
          "iopub.status.busy": "2022-06-16T01:52:21.333Z",
          "iopub.execute_input": "2022-06-16T01:52:21.338Z",
          "iopub.status.idle": "2022-06-16T01:52:21.407Z",
          "shell.execute_reply": "2022-06-16T01:52:21.513Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Create more data\n",
        "# dataset['Increase_Decrease'] = np.where(dataset['Volume'].shift(-1) > dataset['Volume'],1,0)\n",
        "# dataset['Buy_Sell_on_Open'] = np.where(dataset['Open'].shift(-1) > dataset['Open'],1,-1)\n",
        "# dataset['Buy_Sell'] = np.where(dataset['Adj Close'].shift(-1) > dataset['Adj Close'],1,-1)\n",
        "# dataset['Return'] = dataset['Adj Close'].pct_change()\n",
        "# dataset = dataset.dropna()\n",
        "# dataset.head()"
      ],
      "outputs": [],
      "execution_count": 3,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false,
        "execution": {
          "iopub.status.busy": "2022-06-16T01:52:21.414Z",
          "iopub.execute_input": "2022-06-16T01:52:21.419Z",
          "iopub.status.idle": "2022-06-16T01:52:21.429Z",
          "shell.execute_reply": "2022-06-16T01:52:21.517Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "dataset = dataset.rename(index=str, columns={\"Adj Close\":'Adj_Close'})\n",
        "dataset.head()"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 4,
          "data": {
            "text/plain": "                      Open   High    Low  Close  Adj_Close    Volume\nDate                                                                \n2017-01-03 00:00:00  11.42  11.65  11.02  11.43      11.43  55182000\n2017-01-04 00:00:00  11.45  11.52  11.24  11.43      11.43  40781200\n2017-01-05 00:00:00  11.43  11.69  11.23  11.24      11.24  38855200\n2017-01-06 00:00:00  11.29  11.49  11.11  11.32      11.32  34453500\n2017-01-09 00:00:00  11.37  11.64  11.31  11.49      11.49  37304800",
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Open</th>\n      <th>High</th>\n      <th>Low</th>\n      <th>Close</th>\n      <th>Adj_Close</th>\n      <th>Volume</th>\n    </tr>\n    <tr>\n      <th>Date</th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>2017-01-03 00:00:00</th>\n      <td>11.42</td>\n      <td>11.65</td>\n      <td>11.02</td>\n      <td>11.43</td>\n      <td>11.43</td>\n      <td>55182000</td>\n    </tr>\n    <tr>\n      <th>2017-01-04 00:00:00</th>\n      <td>11.45</td>\n      <td>11.52</td>\n      <td>11.24</td>\n      <td>11.43</td>\n      <td>11.43</td>\n      <td>40781200</td>\n    </tr>\n    <tr>\n      <th>2017-01-05 00:00:00</th>\n      <td>11.43</td>\n      <td>11.69</td>\n      <td>11.23</td>\n      <td>11.24</td>\n      <td>11.24</td>\n      <td>38855200</td>\n    </tr>\n    <tr>\n      <th>2017-01-06 00:00:00</th>\n      <td>11.29</td>\n      <td>11.49</td>\n      <td>11.11</td>\n      <td>11.32</td>\n      <td>11.32</td>\n      <td>34453500</td>\n    </tr>\n    <tr>\n      <th>2017-01-09 00:00:00</th>\n      <td>11.37</td>\n      <td>11.64</td>\n      <td>11.31</td>\n      <td>11.49</td>\n      <td>11.49</td>\n      <td>37304800</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
          },
          "metadata": {}
        }
      ],
      "execution_count": 4,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false,
        "execution": {
          "iopub.status.busy": "2022-06-16T01:52:21.437Z",
          "iopub.execute_input": "2022-06-16T01:52:21.443Z",
          "iopub.status.idle": "2022-06-16T01:52:21.456Z",
          "shell.execute_reply": "2022-06-16T01:52:21.521Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "dataset.shape"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 5,
          "data": {
            "text/plain": "(251, 6)"
          },
          "metadata": {}
        }
      ],
      "execution_count": 5,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false,
        "execution": {
          "iopub.status.busy": "2022-06-16T01:52:21.464Z",
          "iopub.execute_input": "2022-06-16T01:52:21.470Z",
          "iopub.status.idle": "2022-06-16T01:52:21.482Z",
          "shell.execute_reply": "2022-06-16T01:52:21.525Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import statsmodels.formula.api as smf\n",
        "\n",
        "def forward_selected(data, response):\n",
        "    \"\"\"Linear model designed by forward selection.\n",
        "\n",
        "    Parameters:\n",
        "    -----------\n",
        "    data : pandas DataFrame with all possible predictors and response\n",
        "\n",
        "    response: string, name of response column in data\n",
        "\n",
        "    Returns:\n",
        "    --------\n",
        "    model: an \"optimal\" fitted statsmodels linear model\n",
        "           with an intercept\n",
        "           selected by forward selection\n",
        "           evaluated by adjusted R-squared\n",
        "    \"\"\"\n",
        "    remaining = set(data.columns)\n",
        "    remaining.remove(response)\n",
        "    selected = []\n",
        "    current_score, best_new_score = 0.0, 0.0\n",
        "    while remaining and current_score == best_new_score:\n",
        "        scores_with_candidates = []\n",
        "        for candidate in remaining:\n",
        "            formula = \"{} ~ {} + 1\".format(response,\n",
        "                                           ' + '.join(selected + [candidate]))\n",
        "            score = smf.ols(formula, data).fit().rsquared_adj\n",
        "            scores_with_candidates.append((score, candidate))\n",
        "        scores_with_candidates.sort()\n",
        "        best_new_score, best_candidate = scores_with_candidates.pop()\n",
        "        if current_score < best_new_score:\n",
        "            remaining.remove(best_candidate)\n",
        "            selected.append(best_candidate)\n",
        "            current_score = best_new_score\n",
        "    formula = \"{} ~ {} + 1\".format(response,\n",
        "                                   ' + '.join(selected))\n",
        "    model = smf.ols(formula, data).fit()\n",
        "    return model"
      ],
      "outputs": [],
      "execution_count": 6,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false,
        "execution": {
          "iopub.status.busy": "2022-06-16T01:52:21.490Z",
          "iopub.execute_input": "2022-06-16T01:52:21.495Z",
          "shell.execute_reply": "2022-06-16T01:52:21.785Z",
          "iopub.status.idle": "2022-06-16T01:52:21.762Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "model = forward_selected(dataset, 'Volume')"
      ],
      "outputs": [],
      "execution_count": 7,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false,
        "execution": {
          "iopub.status.busy": "2022-06-16T01:52:21.771Z",
          "iopub.execute_input": "2022-06-16T01:52:21.778Z",
          "iopub.status.idle": "2022-06-16T01:52:21.843Z",
          "shell.execute_reply": "2022-06-16T01:52:21.835Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "print(model.model.formula)\n",
        "\n",
        "print(model.rsquared_adj)"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Volume ~ High + Low + Close + Adj_Close + 1\n",
            "0.6403456692998204\n"
          ]
        }
      ],
      "execution_count": 8,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false,
        "execution": {
          "iopub.status.busy": "2022-06-16T01:52:21.852Z",
          "iopub.execute_input": "2022-06-16T01:52:21.858Z",
          "iopub.status.idle": "2022-06-16T01:52:21.876Z",
          "shell.execute_reply": "2022-06-16T01:52:21.905Z"
        }
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "Another method of Stepwise Regression Forward"
      ],
      "metadata": {}
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn.ensemble import RandomForestClassifier\n",
        "from sklearn.model_selection import train_test_split\n",
        "from sklearn.metrics import accuracy_score as acc\n",
        "from mlxtend.feature_selection import SequentialFeatureSelector as sfs"
      ],
      "outputs": [],
      "execution_count": 9,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false,
        "execution": {
          "iopub.status.busy": "2022-06-16T01:52:21.885Z",
          "iopub.execute_input": "2022-06-16T01:52:21.891Z",
          "shell.execute_reply": "2022-06-16T01:52:22.538Z",
          "iopub.status.idle": "2022-06-16T01:52:22.515Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "dataset = yf.download(symbol,start,end)"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[*********************100%***********************]  1 of 1 completed\n"
          ]
        }
      ],
      "execution_count": 10,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false,
        "execution": {
          "iopub.status.busy": "2022-06-16T01:52:22.526Z",
          "iopub.execute_input": "2022-06-16T01:52:22.532Z",
          "shell.execute_reply": "2022-06-16T01:52:22.783Z",
          "iopub.status.idle": "2022-06-16T01:52:22.746Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Create more data\n",
        "dataset['Increase_Decrease'] = np.where(dataset['Volume'].shift(-1) > dataset['Volume'],1,0)\n",
        "dataset['Buy_Sell_on_Open'] = np.where(dataset['Open'].shift(-1) > dataset['Open'],1,-1)\n",
        "dataset['Buy_Sell'] = np.where(dataset['Adj Close'].shift(-1) > dataset['Adj Close'],1,-1)\n",
        "dataset['Return'] = dataset['Adj Close'].pct_change()\n",
        "dataset = dataset.dropna()\n",
        "dataset.head()"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 11,
          "data": {
            "text/plain": "             Open   High    Low  Close  Adj Close    Volume  \\\nDate                                                          \n2017-01-04  11.45  11.52  11.24  11.43      11.43  40781200   \n2017-01-05  11.43  11.69  11.23  11.24      11.24  38855200   \n2017-01-06  11.29  11.49  11.11  11.32      11.32  34453500   \n2017-01-09  11.37  11.64  11.31  11.49      11.49  37304800   \n2017-01-10  11.55  11.63  11.33  11.44      11.44  29201600   \n\n            Increase_Decrease  Buy_Sell_on_Open  Buy_Sell    Return  \nDate                                                                 \n2017-01-04                  0                -1        -1  0.000000  \n2017-01-05                  0                -1         1 -0.016623  \n2017-01-06                  1                 1         1  0.007117  \n2017-01-09                  0                 1        -1  0.015018  \n2017-01-10                  1                -1        -1 -0.004352  ",
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Open</th>\n      <th>High</th>\n      <th>Low</th>\n      <th>Close</th>\n      <th>Adj Close</th>\n      <th>Volume</th>\n      <th>Increase_Decrease</th>\n      <th>Buy_Sell_on_Open</th>\n      <th>Buy_Sell</th>\n      <th>Return</th>\n    </tr>\n    <tr>\n      <th>Date</th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>2017-01-04</th>\n      <td>11.45</td>\n      <td>11.52</td>\n      <td>11.24</td>\n      <td>11.43</td>\n      <td>11.43</td>\n      <td>40781200</td>\n      <td>0</td>\n      <td>-1</td>\n      <td>-1</td>\n      <td>0.000000</td>\n    </tr>\n    <tr>\n      <th>2017-01-05</th>\n      <td>11.43</td>\n      <td>11.69</td>\n      <td>11.23</td>\n      <td>11.24</td>\n      <td>11.24</td>\n      <td>38855200</td>\n      <td>0</td>\n      <td>-1</td>\n      <td>1</td>\n      <td>-0.016623</td>\n    </tr>\n    <tr>\n      <th>2017-01-06</th>\n      <td>11.29</td>\n      <td>11.49</td>\n      <td>11.11</td>\n      <td>11.32</td>\n      <td>11.32</td>\n      <td>34453500</td>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n      <td>0.007117</td>\n    </tr>\n    <tr>\n      <th>2017-01-09</th>\n      <td>11.37</td>\n      <td>11.64</td>\n      <td>11.31</td>\n      <td>11.49</td>\n      <td>11.49</td>\n      <td>37304800</td>\n      <td>0</td>\n      <td>1</td>\n      <td>-1</td>\n      <td>0.015018</td>\n    </tr>\n    <tr>\n      <th>2017-01-10</th>\n      <td>11.55</td>\n      <td>11.63</td>\n      <td>11.33</td>\n      <td>11.44</td>\n      <td>11.44</td>\n      <td>29201600</td>\n      <td>1</td>\n      <td>-1</td>\n      <td>-1</td>\n      <td>-0.004352</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
          },
          "metadata": {}
        }
      ],
      "execution_count": 11,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false,
        "execution": {
          "iopub.status.busy": "2022-06-16T01:52:22.755Z",
          "iopub.execute_input": "2022-06-16T01:52:22.761Z",
          "iopub.status.idle": "2022-06-16T01:52:22.805Z",
          "shell.execute_reply": "2022-06-16T01:52:22.788Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "X = dataset[['Open', 'High', 'Low', 'Close', 'Volume','Return']].values\n",
        "# X = dataset.iloc[:, dataset.columns != 'Buy_Sell', 'Close']\n",
        "Y = dataset['Buy_Sell'] # Cannot be Continuous"
      ],
      "outputs": [],
      "execution_count": 12,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false,
        "execution": {
          "iopub.status.busy": "2022-06-16T01:52:22.814Z",
          "iopub.execute_input": "2022-06-16T01:52:22.820Z",
          "iopub.status.idle": "2022-06-16T01:52:22.826Z",
          "shell.execute_reply": "2022-06-16T01:52:22.792Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Train/test split\n",
        "X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.25, random_state=42)\n",
        "\n",
        "y_train = y_train.ravel()\n",
        "y_test = y_test.ravel()\n",
        "\n",
        "print('Training dataset shape:', X_train.shape, y_train.shape)\n",
        "print('Testing dataset shape:', X_test.shape, y_test.shape)"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Training dataset shape: (187, 6) (187,)\n",
            "Testing dataset shape: (63, 6) (63,)\n"
          ]
        }
      ],
      "execution_count": 13,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false,
        "execution": {
          "iopub.status.busy": "2022-06-16T01:52:22.834Z",
          "iopub.execute_input": "2022-06-16T01:52:22.842Z",
          "iopub.status.idle": "2022-06-16T01:52:22.867Z",
          "shell.execute_reply": "2022-06-16T01:52:22.938Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Build RF classifier to use in feature selection\n",
        "clf = RandomForestClassifier(n_estimators=100, n_jobs=-1)\n",
        "\n",
        "# Build step forward feature selection\n",
        "sfs1 = sfs(clf,\n",
        "           k_features=5,\n",
        "           forward=True,\n",
        "           floating=False,\n",
        "           verbose=2,\n",
        "           scoring='accuracy',\n",
        "           cv=5)\n",
        "\n",
        "# Perform SFFS\n",
        "sfs1 = sfs1.fit(X_train, y_train)"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n",
            "[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    3.2s remaining:    0.0s\n",
            "[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    9.0s finished\n",
            "\n",
            "[2022-06-15 18:52:31] Features: 1/5 -- score: 0.556330014224751[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n",
            "[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    1.1s remaining:    0.0s\n",
            "[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    5.7s finished\n",
            "\n",
            "[2022-06-15 18:52:37] Features: 2/5 -- score: 0.55049786628734[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n",
            "[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    1.1s remaining:    0.0s\n",
            "[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    4.5s finished\n",
            "\n",
            "[2022-06-15 18:52:42] Features: 3/5 -- score: 0.5125177809388336[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n",
            "[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    1.1s remaining:    0.0s\n",
            "[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    3.4s finished\n",
            "\n",
            "[2022-06-15 18:52:45] Features: 4/5 -- score: 0.5071123755334281[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n",
            "[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    1.1s remaining:    0.0s\n",
            "[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    2.2s finished\n",
            "\n",
            "[2022-06-15 18:52:48] Features: 5/5 -- score: 0.4860597439544808"
          ]
        }
      ],
      "execution_count": 14,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false,
        "execution": {
          "iopub.status.busy": "2022-06-16T01:52:22.887Z",
          "iopub.execute_input": "2022-06-16T01:52:22.902Z",
          "shell.execute_reply": "2022-06-16T01:52:48.173Z",
          "iopub.status.idle": "2022-06-16T01:52:48.093Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Which features?\n",
        "feat_cols = list(sfs1.k_feature_idx_)\n",
        "print(feat_cols)"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[0, 1, 2, 3, 5]\n"
          ]
        }
      ],
      "execution_count": 15,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false,
        "execution": {
          "iopub.status.busy": "2022-06-16T01:52:48.105Z",
          "iopub.execute_input": "2022-06-16T01:52:48.115Z",
          "iopub.status.idle": "2022-06-16T01:52:48.128Z",
          "shell.execute_reply": "2022-06-16T01:52:48.176Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn.metrics import mean_squared_error, r2_score"
      ],
      "outputs": [],
      "execution_count": 16,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2022-06-16T01:52:48.136Z",
          "iopub.execute_input": "2022-06-16T01:52:48.142Z",
          "iopub.status.idle": "2022-06-16T01:52:48.150Z",
          "shell.execute_reply": "2022-06-16T01:52:48.180Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Build full model with selected features\n",
        "clf = RandomForestClassifier(n_estimators=1000, random_state=42, max_depth=4)\n",
        "clf.fit(X_train[:, feat_cols], y_train)\n",
        "\n",
        "y_train_pred = clf.predict(X_train[:, feat_cols])\n",
        "print('Training accuracy on selected features: %.3f' % acc(y_train, y_train_pred))\n",
        "\n",
        "y_test_pred = clf.predict(X_test[:, feat_cols])\n",
        "print('Testing accuracy on selected features: %.3f' % acc(y_test, y_test_pred))"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Training accuracy on selected features: 0.861\n",
            "Testing accuracy on selected features: 0.460\n"
          ]
        }
      ],
      "execution_count": 17,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false,
        "execution": {
          "iopub.status.busy": "2022-06-16T01:52:48.160Z",
          "iopub.execute_input": "2022-06-16T01:52:48.167Z",
          "iopub.status.idle": "2022-06-16T01:52:49.153Z",
          "shell.execute_reply": "2022-06-16T01:52:49.162Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Build full model on ALL features, for comparison\n",
        "clf = RandomForestClassifier(n_estimators=1000, random_state=42, max_depth=4)\n",
        "clf.fit(X_train, y_train)\n",
        "\n",
        "y_train_pred = clf.predict(X_train)\n",
        "print('Training accuracy on all features: %.3f' % acc(y_train, y_train_pred))\n",
        "\n",
        "y_test_pred = clf.predict(X_test)\n",
        "print('Testing accuracy on all features: %.3f' % acc(y_test, y_test_pred))"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Training accuracy on all features: 0.872\n",
            "Testing accuracy on all features: 0.460\n"
          ]
        }
      ],
      "execution_count": 19,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false,
        "execution": {
          "iopub.status.busy": "2022-06-16T01:52:49.275Z",
          "iopub.execute_input": "2022-06-16T01:52:49.281Z",
          "iopub.status.idle": "2022-06-16T01:52:50.267Z",
          "shell.execute_reply": "2022-06-16T01:52:50.260Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "y_pred = clf.predict(X_test)"
      ],
      "outputs": [],
      "execution_count": 20,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2022-06-16T01:52:50.276Z",
          "iopub.execute_input": "2022-06-16T01:52:50.281Z",
          "shell.execute_reply": "2022-06-16T01:52:50.333Z",
          "iopub.status.idle": "2022-06-16T01:52:50.340Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "print(\"Mean Absolute Error:\", mean_squared_error(y_test, y_pred))\n",
        "print(\"Coefficient of Determination:\", r2_score(y_test, y_pred))\n"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Mean Absolute Error: 2.1587301587301586\n",
            "Coefficient of Determination: -1.1636363636363636\n"
          ]
        }
      ],
      "execution_count": 21,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2022-06-16T01:52:50.348Z",
          "iopub.execute_input": "2022-06-16T01:52:50.353Z",
          "iopub.status.idle": "2022-06-16T01:52:50.364Z",
          "shell.execute_reply": "2022-06-16T01:52:50.373Z"
        }
      }
    }
  ],
  "metadata": {
    "kernel_info": {
      "name": "python3"
    },
    "language_info": {
      "name": "python",
      "version": "3.6.13",
      "mimetype": "text/x-python",
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "pygments_lexer": "ipython3",
      "nbconvert_exporter": "python",
      "file_extension": ".py"
    },
    "kernelspec": {
      "name": "python3",
      "language": "python",
      "display_name": "Python 3"
    },
    "nteract": {
      "version": "0.28.0"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 4
}